library(Seurat)
## Loading required package: SeuratObject
## Warning: package 'SeuratObject' was built under R version 4.3.3
## Loading required package: sp
##
## Attaching package: 'SeuratObject'
## The following objects are masked from 'package:base':
##
## intersect, t
library(SuperSpot)
library(SuperCell)
library(tidyr)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ purrr 1.0.2
## ✔ forcats 1.0.0 ✔ readr 2.1.5
## ✔ ggplot2 3.5.1 ✔ stringr 1.5.1
## ✔ lubridate 1.9.3 ✔ tibble 3.2.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(igraph)
##
## Attaching package: 'igraph'
##
## The following objects are masked from 'package:lubridate':
##
## %--%, union
##
## The following objects are masked from 'package:dplyr':
##
## as_data_frame, groups, union
##
## The following objects are masked from 'package:purrr':
##
## compose, simplify
##
## The following object is masked from 'package:tibble':
##
## as_data_frame
##
## The following object is masked from 'package:tidyr':
##
## crossing
##
## The following object is masked from 'package:Seurat':
##
## components
##
## The following objects are masked from 'package:stats':
##
## decompose, spectrum
##
## The following object is masked from 'package:base':
##
## union
Download files from https://zenodo.org/records/8327576. You need “well7_5raw_expression_pd.csv”, “metadata.csv” and “well7_5_spatial.csv”.
wget https://zenodo.org/records/8327576/files/well7_5raw_expression_pd.csv
wget https://zenodo.org/records/8327576/files/metadata.csv
wget https://zenodo.org/records/8327576/files/well7_5_spatial.csv
well7.mtx <- read_csv("./well7_5raw_expression_pd.csv") %>% column_to_rownames("GENE")
## Rows: 1022 Columns: 44976
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): GENE
## dbl (44975): well7_5_0, well7_5_1, well7_5_2, well7_5_3, well7_5_4, well7_5_...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
well7.mtx[1:5,1:5]
## well7_5_0 well7_5_1 well7_5_2 well7_5_3 well7_5_4
## A2M 0 0 0 0 0
## ABCC9 0 0 0 0 0
## ABI3BP 0 0 0 0 0
## ACBD7 0 0 0 0 0
## ACTA2 0 0 0 0 0
md <- read_csv("./metadata.csv")
## Rows: 1091281 Columns: 18
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (18): NAME, biosample_id, donor_id, species, species__ontology_label, di...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
md[1:5,1:5]
## # A tibble: 5 × 5
## NAME biosample_id donor_id species species__ontology_label
## <chr> <chr> <chr> <chr> <chr>
## 1 TYPE group group group group
## 2 sagittal3_0 1 A NCBITaxon:10090 Mus musculus
## 3 sagittal3_1 1 A NCBITaxon:10090 Mus musculus
## 4 sagittal3_2 1 A NCBITaxon:10090 Mus musculus
## 5 sagittal3_3 1 A NCBITaxon:10090 Mus musculus
well7.md <- md[md$NAME %in% colnames(well7.mtx) == T,]
well7.md[1:5,1:5]
## # A tibble: 5 × 5
## NAME biosample_id donor_id species species__ontology_label
## <chr> <chr> <chr> <chr> <chr>
## 1 well7_5_0 15 C NCBITaxon:10090 Mus musculus
## 2 well7_5_1 15 C NCBITaxon:10090 Mus musculus
## 3 well7_5_2 15 C NCBITaxon:10090 Mus musculus
## 4 well7_5_3 15 C NCBITaxon:10090 Mus musculus
## 5 well7_5_4 15 C NCBITaxon:10090 Mus musculus
well7.spatial <- read_csv("./well7_5_spatial.csv")
## Rows: 44976 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (9): NAME, X, Y, Z, Main_molecular_cell_type, Sub_molecular_cell_type, M...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
well7.spatial <- well7.spatial[-1,]
spotPosition <- dplyr::select(well7.spatial,c("NAME","X","Y")) %>% column_to_rownames("NAME")
colnames(spotPosition) <- c("imagerow","imagecol")
spotPosition$imagerow <- as.numeric(spotPosition$imagerow)
spotPosition$imagecol <- as.numeric(spotPosition$imagecol)
spotPosition <- select(spotPosition,c("imagerow","imagecol"))
The function “SCimplify_SpatialDLS” uses the raw count matrix and the spatial coordinates of the spots to build the metaspots. You can choose to split or not the the connections between the spots that have a higher distance compared to the other ones with “split_not_connected”. You can also split the metaspots based on a provided annotation with “cell.annotation” parameter (i.e.,metaspots containing spots from different cell types/regions will be split resulting in one metaspot for each cell type/region). The main output here is the membership given to each spot to know in which metaspot it is assigned.
g = 10 # gamma
n.pc = 1:5 # number of first PC to use
k.knn = 16 # number of neighbors to connect to each spot
print("Creating metaspots")
## [1] "Creating metaspots"
# By default, SCimplify_SpatialDLS computes distances in a parallalized way. By default, all the available cpus are used. If your computer doesn't support, you can change the number of cpus with the paramater "n.cpu"
MC.well7_DLS <- SCimplify_SpatialDLS(X = well7.mtx ,
spotPositions = spotPosition ,
method_similarity = "1",
split_not_connected = T,
genes.use = rownames(well7.mtx),
gamma = g,
n.pc = n.pc,
method_knn = "1",
k.knn = k.knn,
method_normalization = "log_normalize",
cell.annotation = well7.md$Main_molecular_cell_type)
## [1] "Building KNN graph with nn2"
## [1] "Neighbors with distance > 189.761884073383 are removed"
## [1] "Maximum gamma is 138.384615384615"
## [1] "Done"
## Warning: Data is of class data.frame. Coercing to dgCMatrix.
## [1] "Performing Log Normalization"
## Normalizing layer: counts
## [1] "Done"
## [1] "Running PCA"
## Finding variable features for layer counts
## Centering and scaling data matrix
## [1] "Done"
## [1] "Computing PCA dist for each edge"
## [1] "Done"
## [1] "Computing similarity from PCA distances"
## [1] "Done"
## [1] "Returning graph with PCA similarity as weight"
## [1] "Clustering"
## [1] "Done"
print("Done")
## [1] "Done"
well7.md[,str_c("MC_membership_",g)] <- MC.well7_DLS$membership %>% as.character()
The major quality control for metaspots is purity (proportion of the most abundant cell type/region within each metaspot). In the case where we decided to split the metaspots with the paramater “cell.annotation”, the purity should be equal to 1.
#We compute the purity for each metaspot
method_purity <- c("max_proportion", "entropy")[1]
MC.well7_DLS$purity <- supercell_purity(
clusters = well7.md$Main_molecular_cell_type,
supercell_membership = MC.well7_DLS$membership,
method = method_purity
)
print(str_c("mean purity is ",mean(MC.well7_DLS$purity)))
## [1] "mean purity is 1"
#We assign each metaspot with its corresponding annotation
MC.well7_DLS$Main_molecular_cell_type <- supercell_assign(clusters = well7.md$Main_molecular_cell_type,
supercell_membership = MC.well7_DLS$membership,
method = "absolute")
SuperSpot come with its own way to visualize the metapots. The function “supercell_metaspots_shape” first builds the polygons representing the metaspots covering the original spots.
print("Creating polygons for visualization")
## [1] "Creating polygons for visualization"
MC.well7_DLS$polygons <- supercell_metaspots_shape(MC = MC.well7_DLS,
spotpositions = spotPosition,
annotation = "Main_molecular_cell_type",
concavity = 2,
membership_name = "membership")
print("Done")
## [1] "Done"
SpatialDimPlotSC(original_coord = spotPosition,
MC = MC.well7_DLS,
sc.col = "Main_molecular_cell_type",
sc.col2 = str_c("MC_membership_",g),
polygons_col = "polygons",
meta_data = well7.md)+
NoLegend()